#! /usr/bin/env python
# -*- coding: utf-8 -*-
'''
Created on 2016年6月9日

@author: xiezc
'''
import pymysql
import datetime
        
class Date_manager(object):
    '''连接数据库的类，主要负责与数据库的相关操作'''


    def update_song_hotNum(self, old_songs):
        """更新数据库的hotNum字段 """
        parms = [] 
        for url in old_songs:     
            x = (url)
            parms.append(x)
        try:
            # 获取一个数据库连接，注意如果是UTF-8类型的，需要制定数据库
            conn = pymysql.connect(self.host, self.user, self.passwd, self.db, self.port, charset='utf8')
            cur = conn.cursor()  # 获取一个游标
            cur.executemany("update hotSong set hotNum=hotNum+1 where sid=%s", parms)
            conn.commit()
            print("更新了%d首歌曲的hotNum值" % len(parms))
        finally:
            cur.close()
            conn.close()



    def __init__(self, host='localhost', user='root', passwd='rootxzc', db='mytest', port=3306):
        '''传入连接mysql数据库的用户名密码等信息'''
        self.host = host
        self.user = user
        self.passwd = passwd
        self.db = db
        self.port = port
   
    def add_song2db(self, songs):
        """添加一批歌曲到数据库中
           songs是传进来的包含歌曲的list
           list中的一个元素是包含歌曲信息的dict
        """
        now = datetime.datetime.now()
        parms = [] 
        for song in songs:
            x = (song["sid"], song["author"], song["sname"], song["artistId"], song["artistName"], song["lrcLink"], song["songLink"], now.strftime("%Y-%m-%d %H:%M:%S"))
            parms.append(x)
        try:
            # 获取一个数据库连接，注意如果是UTF-8类型的，需要制定数据库
            conn = pymysql.connect(self.host, self.user, self.passwd, self.db, self.port, charset='utf8')
            cur = conn.cursor()  # 获取一个游标
            cur.executemany("insert into hotsong(sid,author,sname,artistId,artistName,lrcLink,songLink,createTime,hotNum)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,1)", parms)
            conn.commit()
            print('向数据库中添加了%d首新歌曲' % len(parms))
        finally:
            cur.close()
            conn.close()
     
    
           
   
    def get_song_set(self):
        '''一次获得所有歌曲的sid并放入set中，以此来判断歌曲数据库中是否已经存在从而决定是否下载'''
        sidset = set()
        try:  
            # 获取一个数据库连接，注意如果是UTF-8类型的，需要制定数据库
            conn = pymysql.connect(self.host, self.user, self.passwd, self.db, self.port, charset='utf8')
            cur = conn.cursor()  # 获取一个游标
            cur.execute('select sid from hotsong')
            data = cur.fetchall()
            for d in data:
                sidset.add(d[0])
        except Exception as e:
            print("get_song发生异常" + e)
        finally:
            cur.close()  # 关闭游标
            conn.close()  # 释放数据库资源
        print("获得数据库的所有歌曲id的set集合，set大小为：%d" % len(sidset))
        return sidset

'''
Created on 2016年6月6日

@author: xiezc
'''
from urllib import request
from bs4 import BeautifulSoup
import json
import os.path

import urllib.error
import socket


# http://www.oschina.net/code/snippet_60100_25076
class Parse_html(object):
    '''
    此类主要是下载网页解析出信息，并下载歌曲
    '''


    def __init__(self, root_url):
        self.root_url = root_url
        
     
    def get_top_hotDay(self, sidset):
        '''一次获得热歌榜的500首歌''' 
        new_songs = []
        old_songs = []
        print(self.root_url)
        response = request.urlopen('http://music.baidu.com/top/dayhot', timeout=6)
        resp_html = response.read().decode("utf8")
        def parse_resp_html():
            soup = BeautifulSoup(resp_html)
            for li in soup.select("li.bb-dotimg.clearfix.song-item-hook.csong-item-hook"):
                data_song = json.loads(li['data-songitem'])
                sid = data_song["songItem"]["sid"]  # 歌曲id下载歌曲有用
                if sid in sidset:
                    old_songs.append(sid);
                    continue
                author = data_song["songItem"]["author"]  # 歌曲的作者
                sname = data_song["songItem"]["sname"]  # 歌名
                song = {'sid':sid, 'author':author, 'sname':sname}
                new_songs.append(song)
        # 调用内部函数        
        parse_resp_html()  
        print('解析网页获得热歌榜中新歌曲集合new_songs大小为：%d, 老歌曲old_song集合的大小：%d' % (len(new_songs), len(old_songs)))     
        return new_songs, old_songs   
         
    def download(self, song):
        '''下载歌曲的信息以及歌曲的本身和歌词'''
        flag = self.__download_info(song)
        if not flag:
            return flag
        self.__download_song(song)
        self.__download_lrc(song)
        return song
    
    def download_songs(self, songs):
        """一次下载多首歌曲，下载数量根据传入的songs集合的大侠决定"""
        for song in songs:
            while True:
                try:
                    song = self.download(song)
                    if not song:
                        return song
                except urllib.error.URLError as e1:
                    print("超时异常：" + str(e1))
                    continue
                except socket.timeout as e2:
                    print("超时socket:" + str(e2))
                    continue
                except Exception as e:  
                    print("Exception：" + str(e))   
                break 
        return songs
    
    
    
    def __download_info(self, song):
        '''下载歌曲的相关信息'''
        url = 'http://music.baidu.com/data/music/links?songIds=%s' % song['sid']
        response = request.urlopen(url, timeout=6)
        resp_json_str = response.read().decode("utf8")
        resp_json = json.loads(resp_json_str)
        if resp_json["errorCode"] == 22000:
            songList = resp_json["data"]["songList"]
            sLink = songList[0]["songLink"]
            if sLink == '':
                return False
            song["songLink"] = sLink  # 歌曲下载链接
            song["artistId"] = songList[0]["artistId"]  # 专辑ID
            song["artistName"] = songList[0]["artistName"]  # 专辑名称
            song["lrcLink"] = songList[0]["lrcLink"]  # 歌词下载链接
            song["format"] = songList[0]["format"]  # 歌曲格式    
            return True
            
    def __download_song(self, song):
        '''下载歌曲本身'''
        url = song["songLink"]
        req = request.Request(url, headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'})
        response = request.urlopen(req, timeout=6)
        data = response.read()
        now = datetime.datetime.now()
        pfileName = "../../../../music/" 
        fileName = pfileName + now.strftime('%Y%m')
        if not os.path.exists(fileName):
            os.mkdir(fileName)
        # 防止重名所以歌曲的名字带上sid
        fileName_format = fileName + '/' + song["sname"] + '_' + song["sid"] + '.' + song["format"] 
        fileName_format = fileName_format.replace('?', '').replace('"', '')
        with open(fileName_format, 'wb') as fp:
            fp.write(data)
        song['songLink'] = now.strftime('%Y%m') + '/' + song["sname"] + '_' + song["sid"] + '.' + song["format"] 
            
    def __download_lrc(self, song):
        '''下载歌词'''
        url = song["lrcLink"]
        if url == '':
            print(song)
            return
        req = request.Request(url, headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'})
        response = request.urlopen(req, timeout=6)
        data = response.read()
        now = datetime.datetime.now()
        pfileName = "../../../../music/" 
        fileName = pfileName + now.strftime('%Y%m')  
        if not os.path.exists(fileName):
            os.mkdir(fileName)
        fileName_lrc = fileName + '/' + song["sname"] + '_' + song["sid"] + '.lrc'
        fileName_lrc = fileName_lrc.replace('?', '').replace('"', '')
        with open(fileName_lrc, 'wb') as fp:
            fp.write(data)
        song['lrcLink'] = now.strftime('%Y%m') + '/' + song["sname"] + '_' + song["sid"] + '.lrc'
        
        
def main():
    parse_html = Parse_html('http://music.baidu.com/top/dayhot') 
    # 创建解析网页下载歌曲的类对象
    date_manager = Date_manager()
    
    new_songs = [];
    jsonSong = json.loads(input());
    new_songs.append(jsonSong)
            
    # 下载所有的新增的歌曲
    songs = parse_html.download_songs(new_songs)
    
    if not songs:
        print("没找到歌曲")
        return
    # 将所有的新增的歌曲一次插入数据库
    date_manager.add_song2db(songs)
    print(json.dumps(songs))
    

if __name__ == "__main__":
    main()
    
   
